# 基础计算

from pyspark import SparkConf,SparkContext
import os
os.environ["PYSPARK_PYTHON"]="D:/Soft/Python/Python310/python.exe"

if __name__ == '__main__':
    conf=SparkConf().setMaster("local[*]").setAppName("calc")
    sc=SparkContext(conf=conf)
    # map测试
    rdd=sc.parallelize([1,2,3,4,5])
    rdd2=rdd.map(lambda ele:ele*10).map(lambda ele:ele+5)
    print(rdd2.collect())

    # flatMap测试
    rdd3=sc.parallelize(["yang yi she","shang jian xing","wo shi shui"])
    rdd4=rdd3.flatMap(lambda ele:ele.split(" "))
    print(rdd4.collect())

    # reduceByKey测试
    rdd5=sc.parallelize([("男",22),("女",34),("男",52),("女",14)])
    print(rdd5.reduceByKey(lambda a,b:a+b).collect())

    sc.stop()

